In [2]:
import numpy as np
import pandas as pd
import os
filepath = r'C:\Users\USER\Desktop'
filename01 = 'creditcard.csv'
df_full = pd.read_csv(os.path.join(filepath, filename01))
In [3]:
df_full
Out[3]:
Time
V1
V2
V3
V4
V5
V6
V7
V8
V9
...
V21
V22
V23
V24
V25
V26
V27
V28
Amount
Class
0
0.0
-1.359807
-0.072781
2.536347
1.378155
-0.338321
0.462388
0.239599
0.098698
0.363787
...
-0.018307
0.277838
-0.110474
0.066928
0.128539
-0.189115
0.133558
-0.021053
149.62
0
1
0.0
1.191857
0.266151
0.166480
0.448154
0.060018
-0.082361
-0.078803
0.085102
-0.255425
...
-0.225775
-0.638672
0.101288
-0.339846
0.167170
0.125895
-0.008983
0.014724
2.69
0
2
1.0
-1.358354
-1.340163
1.773209
0.379780
-0.503198
1.800499
0.791461
0.247676
-1.514654
...
0.247998
0.771679
0.909412
-0.689281
-0.327642
-0.139097
-0.055353
-0.059752
378.66
0
3
1.0
-0.966272
-0.185226
1.792993
-0.863291
-0.010309
1.247203
0.237609
0.377436
-1.387024
...
-0.108300
0.005274
-0.190321
-1.175575
0.647376
-0.221929
0.062723
0.061458
123.50
0
4
2.0
-1.158233
0.877737
1.548718
0.403034
-0.407193
0.095921
0.592941
-0.270533
0.817739
...
-0.009431
0.798278
-0.137458
0.141267
-0.206010
0.502292
0.219422
0.215153
69.99
0
5
2.0
-0.425966
0.960523
1.141109
-0.168252
0.420987
-0.029728
0.476201
0.260314
-0.568671
...
-0.208254
-0.559825
-0.026398
-0.371427
-0.232794
0.105915
0.253844
0.081080
3.67
0
6
4.0
1.229658
0.141004
0.045371
1.202613
0.191881
0.272708
-0.005159
0.081213
0.464960
...
-0.167716
-0.270710
-0.154104
-0.780055
0.750137
-0.257237
0.034507
0.005168
4.99
0
7
7.0
-0.644269
1.417964
1.074380
-0.492199
0.948934
0.428118
1.120631
-3.807864
0.615375
...
1.943465
-1.015455
0.057504
-0.649709
-0.415267
-0.051634
-1.206921
-1.085339
40.80
0
8
7.0
-0.894286
0.286157
-0.113192
-0.271526
2.669599
3.721818
0.370145
0.851084
-0.392048
...
-0.073425
-0.268092
-0.204233
1.011592
0.373205
-0.384157
0.011747
0.142404
93.20
0
9
9.0
-0.338262
1.119593
1.044367
-0.222187
0.499361
-0.246761
0.651583
0.069539
-0.736727
...
-0.246914
-0.633753
-0.120794
-0.385050
-0.069733
0.094199
0.246219
0.083076
3.68
0
10
10.0
1.449044
-1.176339
0.913860
-1.375667
-1.971383
-0.629152
-1.423236
0.048456
-1.720408
...
-0.009302
0.313894
0.027740
0.500512
0.251367
-0.129478
0.042850
0.016253
7.80
0
11
10.0
0.384978
0.616109
-0.874300
-0.094019
2.924584
3.317027
0.470455
0.538247
-0.558895
...
0.049924
0.238422
0.009130
0.996710
-0.767315
-0.492208
0.042472
-0.054337
9.99
0
12
10.0
1.249999
-1.221637
0.383930
-1.234899
-1.485419
-0.753230
-0.689405
-0.227487
-2.094011
...
-0.231809
-0.483285
0.084668
0.392831
0.161135
-0.354990
0.026416
0.042422
121.50
0
13
11.0
1.069374
0.287722
0.828613
2.712520
-0.178398
0.337544
-0.096717
0.115982
-0.221083
...
-0.036876
0.074412
-0.071407
0.104744
0.548265
0.104094
0.021491
0.021293
27.50
0
14
12.0
-2.791855
-0.327771
1.641750
1.767473
-0.136588
0.807596
-0.422911
-1.907107
0.755713
...
1.151663
0.222182
1.020586
0.028317
-0.232746
-0.235557
-0.164778
-0.030154
58.80
0
15
12.0
-0.752417
0.345485
2.057323
-1.468643
-1.158394
-0.077850
-0.608581
0.003603
-0.436167
...
0.499625
1.353650
-0.256573
-0.065084
-0.039124
-0.087086
-0.180998
0.129394
15.99
0
16
12.0
1.103215
-0.040296
1.267332
1.289091
-0.735997
0.288069
-0.586057
0.189380
0.782333
...
-0.024612
0.196002
0.013802
0.103758
0.364298
-0.382261
0.092809
0.037051
12.99
0
17
13.0
-0.436905
0.918966
0.924591
-0.727219
0.915679
-0.127867
0.707642
0.087962
-0.665271
...
-0.194796
-0.672638
-0.156858
-0.888386
-0.342413
-0.049027
0.079692
0.131024
0.89
0
18
14.0
-5.401258
-5.450148
1.186305
1.736239
3.049106
-1.763406
-1.559738
0.160842
1.233090
...
-0.503600
0.984460
2.458589
0.042119
-0.481631
-0.621272
0.392053
0.949594
46.80
0
19
15.0
1.492936
-1.029346
0.454795
-1.438026
-1.555434
-0.720961
-1.080664
-0.053127
-1.978682
...
-0.177650
-0.175074
0.040002
0.295814
0.332931
-0.220385
0.022298
0.007602
5.00
0
20
16.0
0.694885
-1.361819
1.029221
0.834159
-1.191209
1.309109
-0.878586
0.445290
-0.446196
...
-0.295583
-0.571955
-0.050881
-0.304215
0.072001
-0.422234
0.086553
0.063499
231.71
0
21
17.0
0.962496
0.328461
-0.171479
2.109204
1.129566
1.696038
0.107712
0.521502
-1.191311
...
0.143997
0.402492
-0.048508
-1.371866
0.390814
0.199964
0.016371
-0.014605
34.09
0
22
18.0
1.166616
0.502120
-0.067300
2.261569
0.428804
0.089474
0.241147
0.138082
-0.989162
...
0.018702
-0.061972
-0.103855
-0.370415
0.603200
0.108556
-0.040521
-0.011418
2.28
0
23
18.0
0.247491
0.277666
1.185471
-0.092603
-1.314394
-0.150116
-0.946365
-1.617935
1.544071
...
1.650180
0.200454
-0.185353
0.423073
0.820591
-0.227632
0.336634
0.250475
22.75
0
24
22.0
-1.946525
-0.044901
-0.405570
-1.013057
2.941968
2.955053
-0.063063
0.855546
0.049967
...
-0.579526
-0.799229
0.870300
0.983421
0.321201
0.149650
0.707519
0.014600
0.89
0
25
22.0
-2.074295
-0.121482
1.322021
0.410008
0.295198
-0.959537
0.543985
-0.104627
0.475664
...
-0.403639
-0.227404
0.742435
0.398535
0.249212
0.274404
0.359969
0.243232
26.43
0
26
23.0
1.173285
0.353498
0.283905
1.133563
-0.172577
-0.916054
0.369025
-0.327260
-0.246651
...
0.067003
0.227812
-0.150487
0.435045
0.724825
-0.337082
0.016368
0.030041
41.88
0
27
23.0
1.322707
-0.174041
0.434555
0.576038
-0.836758
-0.831083
-0.264905
-0.220982
-1.071425
...
-0.284376
-0.323357
-0.037710
0.347151
0.559639
-0.280158
0.042335
0.028822
16.00
0
28
23.0
-0.414289
0.905437
1.727453
1.473471
0.007443
-0.200331
0.740228
-0.029247
-0.593392
...
0.077237
0.457331
-0.038500
0.642522
-0.183891
-0.277464
0.182687
0.152665
33.00
0
29
23.0
1.059387
-0.175319
1.266130
1.186110
-0.786002
0.578435
-0.767084
0.401046
0.699500
...
0.013676
0.213734
0.014462
0.002951
0.294638
-0.395070
0.081461
0.024220
12.99
0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
284777
172764.0
2.079137
-0.028723
-1.343392
0.358000
-0.045791
-1.345452
0.227476
-0.378355
0.665911
...
0.235758
0.829758
-0.002063
0.001344
0.262183
-0.105327
-0.022363
-0.060283
1.00
0
284778
172764.0
-0.764523
0.588379
-0.907599
-0.418847
0.901528
-0.760802
0.758545
0.414698
-0.730854
...
0.003530
-0.431876
0.141759
0.587119
-0.200998
0.267337
-0.152951
-0.065285
80.00
0
284779
172766.0
1.975178
-0.616244
-2.628295
-0.406246
2.327804
3.664740
-0.533297
0.842937
1.128798
...
0.086043
0.543613
-0.032129
0.768379
0.477688
-0.031833
0.014151
-0.066542
25.00
0
284780
172766.0
-1.727503
1.108356
2.219561
1.148583
-0.884199
0.793083
-0.527298
0.866429
0.853819
...
-0.094708
0.236818
-0.204280
1.158185
0.627801
-0.399981
0.510818
0.233265
30.00
0
284781
172766.0
-1.139015
-0.155510
1.894478
-1.138957
1.451777
0.093598
0.191353
0.092211
-0.062621
...
-0.191027
-0.631658
-0.147249
0.212931
0.354257
-0.241068
-0.161717
-0.149188
13.00
0
284782
172767.0
-0.268061
2.540315
-1.400915
4.846661
0.639105
0.186479
-0.045911
0.936448
-2.419986
...
-0.263889
-0.857904
0.235172
-0.681794
-0.668894
0.044657
-0.066751
-0.072447
12.82
0
284783
172768.0
-1.796092
1.929178
-2.828417
-1.689844
2.199572
3.123732
-0.270714
1.657495
0.465804
...
0.271170
1.145750
0.084783
0.721269
-0.529906
-0.240117
0.129126
-0.080620
11.46
0
284784
172768.0
-0.669662
0.923769
-1.543167
-1.560729
2.833960
3.240843
0.181576
1.282746
-0.893890
...
0.183856
0.202670
-0.373023
0.651122
1.073823
0.844590
-0.286676
-0.187719
40.00
0
284785
172768.0
0.032887
0.545338
-1.185844
-1.729828
2.932315
3.401529
0.337434
0.925377
-0.165663
...
-0.266113
-0.716336
0.108519
0.688519
-0.460220
0.161939
0.265368
0.090245
1.79
0
284786
172768.0
-2.076175
2.142238
-2.522704
-1.888063
1.982785
3.732950
-1.217430
-0.536644
0.272867
...
2.016666
-1.588269
0.588482
0.632444
-0.201064
0.199251
0.438657
0.172923
8.95
0
284787
172769.0
-1.029719
-1.110670
-0.636179
-0.840816
2.424360
-2.956733
0.283610
-0.332656
-0.247488
...
0.353722
0.488487
0.293632
0.107812
-0.935586
1.138216
0.025271
0.255347
9.99
0
284788
172770.0
2.007418
-0.280235
-0.208113
0.335261
-0.715798
-0.751373
-0.458972
-0.140140
0.959971
...
-0.208260
-0.430347
0.416765
0.064819
-0.608337
0.268436
-0.028069
-0.041367
3.99
0
284789
172770.0
-0.446951
1.302212
-0.168583
0.981577
0.578957
-0.605641
1.253430
-1.042610
-0.417116
...
0.851800
0.305268
-0.148093
-0.038712
0.010209
-0.362666
0.503092
0.229921
60.50
0
284790
172771.0
-0.515513
0.971950
-1.014580
-0.677037
0.912430
-0.316187
0.396137
0.532364
-0.224606
...
-0.280302
-0.849919
0.300245
0.000607
-0.376379
0.128660
-0.015205
-0.021486
9.81
0
284791
172774.0
-0.863506
0.874701
0.420358
-0.530365
0.356561
-1.046238
0.757051
0.230473
-0.506856
...
-0.108846
-0.480820
-0.074513
-0.003988
-0.113149
0.280378
-0.077310
0.023079
20.32
0
284792
172774.0
-0.724123
1.485216
-1.132218
-0.607190
0.709499
-0.482638
0.548393
0.343003
-0.226323
...
0.414621
1.307511
-0.059545
0.242669
-0.665424
-0.269869
-0.170579
-0.030692
3.99
0
284793
172775.0
1.971002
-0.699067
-1.697541
-0.617643
1.718797
3.911336
-1.259306
1.056209
1.315006
...
0.188758
0.694418
0.163002
0.726365
-0.058282
-0.191813
0.061858
-0.043716
4.99
0
284794
172777.0
-1.266580
-0.400461
0.956221
-0.723919
1.531993
-1.788600
0.314741
0.004704
0.013857
...
-0.157831
-0.883365
0.088485
-0.076790
-0.095833
0.132720
-0.028468
0.126494
0.89
0
284795
172778.0
-12.516732
10.187818
-8.476671
-2.510473
-4.586669
-1.394465
-3.632516
5.498583
4.893089
...
-0.944759
-1.565026
0.890675
-1.253276
1.786717
0.320763
2.090712
1.232864
9.87
0
284796
172780.0
1.884849
-0.143540
-0.999943
1.506772
-0.035300
-0.613638
0.190241
-0.249058
0.666458
...
0.144008
0.634646
-0.042114
-0.053206
0.316403
-0.461441
0.018265
-0.041068
60.00
0
284797
172782.0
-0.241923
0.712247
0.399806
-0.463406
0.244531
-1.343668
0.929369
-0.206210
0.106234
...
-0.228876
-0.514376
0.279598
0.371441
-0.559238
0.113144
0.131507
0.081265
5.49
0
284798
172782.0
0.219529
0.881246
-0.635891
0.960928
-0.152971
-1.014307
0.427126
0.121340
-0.285670
...
0.099936
0.337120
0.251791
0.057688
-1.508368
0.144023
0.181205
0.215243
24.05
0
284799
172783.0
-1.775135
-0.004235
1.189786
0.331096
1.196063
5.519980
-1.518185
2.080825
1.159498
...
0.103302
0.654850
-0.348929
0.745323
0.704545
-0.127579
0.454379
0.130308
79.99
0
284800
172784.0
2.039560
-0.175233
-1.196825
0.234580
-0.008713
-0.726571
0.017050
-0.118228
0.435402
...
-0.268048
-0.717211
0.297930
-0.359769
-0.315610
0.201114
-0.080826
-0.075071
2.68
0
284801
172785.0
0.120316
0.931005
-0.546012
-0.745097
1.130314
-0.235973
0.812722
0.115093
-0.204064
...
-0.314205
-0.808520
0.050343
0.102800
-0.435870
0.124079
0.217940
0.068803
2.69
0
284802
172786.0
-11.881118
10.071785
-9.834783
-2.066656
-5.364473
-2.606837
-4.918215
7.305334
1.914428
...
0.213454
0.111864
1.014480
-0.509348
1.436807
0.250034
0.943651
0.823731
0.77
0
284803
172787.0
-0.732789
-0.055080
2.035030
-0.738589
0.868229
1.058415
0.024330
0.294869
0.584800
...
0.214205
0.924384
0.012463
-1.016226
-0.606624
-0.395255
0.068472
-0.053527
24.79
0
284804
172788.0
1.919565
-0.301254
-3.249640
-0.557828
2.630515
3.031260
-0.296827
0.708417
0.432454
...
0.232045
0.578229
-0.037501
0.640134
0.265745
-0.087371
0.004455
-0.026561
67.88
0
284805
172788.0
-0.240440
0.530483
0.702510
0.689799
-0.377961
0.623708
-0.686180
0.679145
0.392087
...
0.265245
0.800049
-0.163298
0.123205
-0.569159
0.546668
0.108821
0.104533
10.00
0
284806
172792.0
-0.533413
-0.189733
0.703337
-0.506271
-0.012546
-0.649617
1.577006
-0.414650
0.486180
...
0.261057
0.643078
0.376777
0.008797
-0.473649
-0.818267
-0.002415
0.013649
217.00
0
284807 rows × 31 columns
In [4]:
df_full.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
Time 284807 non-null float64
V1 284807 non-null float64
V2 284807 non-null float64
V3 284807 non-null float64
V4 284807 non-null float64
V5 284807 non-null float64
V6 284807 non-null float64
V7 284807 non-null float64
V8 284807 non-null float64
V9 284807 non-null float64
V10 284807 non-null float64
V11 284807 non-null float64
V12 284807 non-null float64
V13 284807 non-null float64
V14 284807 non-null float64
V15 284807 non-null float64
V16 284807 non-null float64
V17 284807 non-null float64
V18 284807 non-null float64
V19 284807 non-null float64
V20 284807 non-null float64
V21 284807 non-null float64
V22 284807 non-null float64
V23 284807 non-null float64
V24 284807 non-null float64
V25 284807 non-null float64
V26 284807 non-null float64
V27 284807 non-null float64
V28 284807 non-null float64
Amount 284807 non-null float64
Class 284807 non-null int64
dtypes: float64(30), int64(1)
memory usage: 67.4 MB
In [5]:
df_full[df_full.Class ==1]
Out[5]:
Time
V1
V2
V3
V4
V5
V6
V7
V8
V9
...
V21
V22
V23
V24
V25
V26
V27
V28
Amount
Class
541
406.0
-2.312227
1.951992
-1.609851
3.997906
-0.522188
-1.426545
-2.537387
1.391657
-2.770089
...
0.517232
-0.035049
-0.465211
0.320198
0.044519
0.177840
0.261145
-0.143276
0.00
1
623
472.0
-3.043541
-3.157307
1.088463
2.288644
1.359805
-1.064823
0.325574
-0.067794
-0.270953
...
0.661696
0.435477
1.375966
-0.293803
0.279798
-0.145362
-0.252773
0.035764
529.00
1
4920
4462.0
-2.303350
1.759247
-0.359745
2.330243
-0.821628
-0.075788
0.562320
-0.399147
-0.238253
...
-0.294166
-0.932391
0.172726
-0.087330
-0.156114
-0.542628
0.039566
-0.153029
239.93
1
6108
6986.0
-4.397974
1.358367
-2.592844
2.679787
-1.128131
-1.706536
-3.496197
-0.248778
-0.247768
...
0.573574
0.176968
-0.436207
-0.053502
0.252405
-0.657488
-0.827136
0.849573
59.00
1
6329
7519.0
1.234235
3.019740
-4.304597
4.732795
3.624201
-1.357746
1.713445
-0.496358
-1.282858
...
-0.379068
-0.704181
-0.656805
-1.632653
1.488901
0.566797
-0.010016
0.146793
1.00
1
6331
7526.0
0.008430
4.137837
-6.240697
6.675732
0.768307
-3.353060
-1.631735
0.154612
-2.795892
...
0.364514
-0.608057
-0.539528
0.128940
1.488481
0.507963
0.735822
0.513574
1.00
1
6334
7535.0
0.026779
4.132464
-6.560600
6.348557
1.329666
-2.513479
-1.689102
0.303253
-3.139409
...
0.370509
-0.576752
-0.669605
-0.759908
1.605056
0.540675
0.737040
0.496699
1.00
1
6336
7543.0
0.329594
3.712889
-5.775935
6.078266
1.667359
-2.420168
-0.812891
0.133080
-2.214311
...
0.156617
-0.652450
-0.551572
-0.716522
1.415717
0.555265
0.530507
0.404474
1.00
1
6338
7551.0
0.316459
3.809076
-5.615159
6.047445
1.554026
-2.651353
-0.746579
0.055586
-2.678679
...
0.208828
-0.511747
-0.583813
-0.219845
1.474753
0.491192
0.518868
0.402528
1.00
1
6427
7610.0
0.725646
2.300894
-5.329976
4.007683
-1.730411
-1.732193
-3.968593
1.063728
-0.486097
...
0.589669
0.109541
0.601045
-0.364700
-1.843078
0.351909
0.594550
0.099372
1.00
1
6446
7672.0
0.702710
2.426433
-5.234513
4.416661
-2.170806
-2.667554
-3.878088
0.911337
-0.166199
...
0.551180
-0.009802
0.721698
0.473246
-1.959304
0.319476
0.600485
0.129305
1.00
1
6472
7740.0
1.023874
2.001485
-4.769752
3.819195
-1.271754
-1.734662
-3.059245
0.889805
0.415382
...
0.343283
-0.054196
0.709654
-0.372216
-2.032068
0.366778
0.395171
0.020206
1.00
1
6529
7891.0
-1.585505
3.261585
-4.137422
2.357096
-1.405043
-1.879437
-3.513687
1.515607
-1.207166
...
0.501543
-0.546869
-0.076584
-0.425550
0.123644
0.321985
0.264028
0.132817
1.00
1
6609
8090.0
-1.783229
3.402794
-3.822742
2.625368
-1.976415
-2.731689
-3.430559
1.413204
-0.776941
...
0.454032
-0.577526
0.045967
0.461700
0.044146
0.305704
0.530981
0.243746
1.00
1
6641
8169.0
0.857321
4.093912
-7.423894
7.380245
0.973366
-2.730762
-1.496497
0.543015
-2.351190
...
0.375026
0.145400
0.240603
-0.234649
-1.004881
0.435832
0.618324
0.148469
1.00
1
6717
8408.0
-1.813280
4.917851
-5.926130
5.701500
1.204393
-3.035138
-1.713402
0.561257
-3.796354
...
0.615642
-0.406427
-0.737018
-0.279642
1.106766
0.323885
0.894767
0.569519
1.00
1
6719
8415.0
-0.251471
4.313523
-6.891438
6.796797
0.616297
-2.966327
-2.436653
0.489328
-3.371639
...
0.536892
-0.546126
-0.605240
-0.263743
1.539916
0.523574
0.891025
0.572741
1.00
1
6734
8451.0
0.314597
2.660670
-5.920037
4.522500
-2.315027
-2.278352
-4.684054
1.202270
-0.694696
...
0.743314
0.064038
0.677842
0.083008
-1.911034
0.322188
0.620867
0.185030
1.00
1
6774
8528.0
0.447396
2.481954
-5.660814
4.455923
-2.443780
-2.185040
-4.716143
1.249803
-0.718326
...
0.756053
0.140168
0.665411
0.131464
-1.908217
0.334808
0.748534
0.175414
1.00
1
6820
8614.0
-2.169929
3.639654
-4.508498
2.730668
-2.122693
-2.341017
-4.235253
1.703538
-1.305279
...
0.645103
-0.503529
-0.000523
0.071696
0.092007
0.308498
0.552591
0.298954
1.00
1
6870
8757.0
-1.863756
3.442644
-4.468260
2.805336
-2.118412
-2.332285
-4.261237
1.701682
-1.439396
...
0.667927
-0.516242
-0.012218
0.070614
0.058504
0.304883
0.418012
0.208858
1.00
1
6882
8808.0
-4.617217
1.695694
-3.114372
4.328199
-1.873257
-0.989908
-4.577265
0.472216
0.472017
...
0.481830
0.146023
0.117039
-0.217565
-0.138776
-0.424453
-1.002041
0.890780
1.10
1
6899
8878.0
-2.661802
5.856393
-7.653616
6.379742
-0.060712
-3.131550
-3.103570
1.778492
-3.831154
...
0.734775
-0.435901
-0.384766
-0.286016
1.007934
0.413196
0.280284
0.303937
1.00
1
6903
8886.0
-2.535852
5.793644
-7.618463
6.395830
-0.065210
-3.136372
-3.104557
1.823233
-3.878658
...
0.716720
-0.448060
-0.402407
-0.288835
1.011752
0.425965
0.413140
0.308205
1.00
1
6971
9064.0
-3.499108
0.258555
-4.489558
4.853894
-6.974522
3.628382
5.431271
-1.946734
-0.775680
...
-1.052368
0.204817
-2.119007
0.170279
-0.393844
0.296367
1.985913
-0.900452
1809.68
1
8296
11080.0
-2.125490
5.973556
-11.034727
9.007147
-1.689451
-2.854415
-7.810441
2.030870
-5.902828
...
1.646518
-0.278485
-0.664841
-1.164555
1.701796
0.690806
2.119749
1.108933
1.00
1
8312
11092.0
0.378275
3.914797
-5.726872
6.094141
1.698875
-2.807314
-0.591118
-0.123496
-2.530713
...
0.149896
-0.601967
-0.613724
-0.403114
1.568445
0.521884
0.527938
0.411910
1.00
1
8335
11131.0
-1.426623
4.141986
-9.804103
6.666273
-4.749527
-2.073129
-10.089931
2.791345
-3.249516
...
1.865679
0.407809
0.605809
-0.769348
-1.746337
0.502040
1.977258
0.711607
1.00
1
8615
11629.0
-3.891192
7.098916
-11.426467
8.607557
-2.065706
-2.985288
-8.138589
2.973928
-6.272790
...
1.757085
-0.189709
-0.508629
-1.189308
1.188536
0.605242
1.881529
0.875260
1.00
1
8617
11635.0
0.919137
4.199633
-7.535607
7.426940
1.118215
-2.886722
-1.341036
0.363933
-2.203224
...
0.316094
0.055179
0.210692
-0.417918
-0.911188
0.466524
0.627393
0.157851
1.00
1
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
251891
155548.0
1.878230
1.325630
-2.333469
4.233151
1.355184
-0.853508
0.716025
-0.164910
-1.502345
...
-0.152131
-0.360736
0.043414
-0.242380
0.171098
-0.002601
-0.026667
0.005238
7.06
1
251904
155554.0
-1.040067
3.106703
-5.409027
3.109903
-0.887237
-2.497522
-2.073347
0.639818
-3.013331
...
0.773961
0.214868
-0.184233
-0.284091
0.493467
0.732329
0.675067
0.337076
94.82
1
252124
155662.0
-1.928613
4.601506
-7.124053
5.716088
1.026579
-3.189073
-2.261897
1.185096
-4.441942
...
0.602291
-0.541287
-0.354639
-0.701492
-0.030973
0.034070
0.573393
0.294686
0.77
1
252774
155965.0
-1.201398
4.864535
-8.328823
7.652399
-0.167445
-2.767695
-3.176421
1.623279
-4.367228
...
0.532320
-0.556913
0.192444
-0.698588
0.025003
0.514968
0.378105
-0.053133
0.77
1
254344
156685.0
-0.129778
0.141547
-0.894702
-0.457662
0.810608
-0.504723
1.373588
-0.209476
0.208494
...
-0.032643
-0.246526
0.484108
0.359637
-0.435972
-0.248480
0.021527
0.109192
187.11
1
254395
156710.0
0.202402
1.176270
0.346379
2.882138
1.407133
-0.504355
1.438537
-0.395603
-1.555142
...
0.242560
0.841230
-0.370157
-0.026012
0.491954
0.234576
-0.279788
-0.331933
7.59
1
255403
157207.0
1.170756
2.501038
-4.986159
5.374160
0.997798
-1.259004
-1.237689
0.358426
-2.612489
...
0.123145
-0.713201
-0.080868
-0.964310
0.338568
0.068630
0.481588
0.268226
4.97
1
255556
157284.0
-0.242245
4.147186
-5.672349
6.493741
1.591168
-1.602523
-0.950463
0.722903
-4.128505
...
0.249023
-0.480286
-0.286080
-1.153575
-0.035571
0.559628
0.409446
0.221048
0.77
1
258403
158638.0
-5.976119
-7.196980
-5.388316
5.104799
4.676533
-5.566870
-4.291180
0.876531
-1.075478
...
1.459369
-0.136262
0.848177
-0.269916
-1.095060
-0.710905
0.565846
-1.034107
296.00
1
261056
159844.0
-0.408111
3.132944
-3.098030
5.803893
0.890609
-0.501474
-0.440054
0.591828
-3.267693
...
0.098482
-0.538375
-0.217989
-1.042657
0.314389
0.543244
0.233851
0.119603
45.51
1
261473
160034.0
-2.349340
1.512604
-2.647497
1.753792
0.406328
-2.188494
-0.686935
-0.547984
-0.099528
...
-0.088519
-0.595178
0.258148
0.061901
-0.354180
-1.152671
-0.736073
0.733703
4.90
1
261925
160243.0
-2.783865
1.596824
-2.084844
2.512986
-1.446749
-0.828496
-0.732262
-0.203329
-0.347046
...
0.203563
0.293268
0.199568
0.146868
0.163602
-0.624085
-1.333100
0.428634
156.00
1
262560
160537.0
0.567539
3.309385
-6.631268
6.394574
-0.054172
-2.396535
-2.792489
0.514811
-3.541780
...
0.614221
-0.365047
-0.180409
-0.523271
0.645054
0.246466
0.902675
0.473571
4.69
1
262826
160665.0
-0.417340
4.700055
-7.521767
7.671884
0.260821
-2.646693
-2.854432
0.958783
-4.588536
...
0.622200
-0.437708
-0.090358
-0.742802
-0.312361
0.502575
0.821390
0.372379
0.77
1
263080
160791.0
2.132386
0.705608
-3.530759
0.514779
1.527175
-1.716268
1.132791
-0.574214
0.128904
...
0.163739
0.703910
-0.245076
0.460049
0.920281
-0.216586
-0.026219
-0.025001
1.00
1
263274
160870.0
-0.644278
5.002352
-8.252739
7.756915
-0.216267
-2.751496
-3.358857
1.406268
-4.403852
...
0.587728
-0.605759
0.033746
-0.756170
-0.008172
0.532772
0.663970
0.192067
0.77
1
263324
160895.0
-0.848290
2.719882
-6.199070
3.044437
-3.301910
-1.992117
-3.734902
1.520079
-2.548788
...
1.125229
0.805258
0.199119
0.035206
0.012159
0.601658
0.137468
-0.171397
127.14
1
263877
161154.0
-3.387601
3.977881
-6.978585
1.657766
-1.100500
-3.599487
-3.686651
1.942252
-3.065089
...
1.043587
0.262189
-0.479224
-0.326638
-0.156939
0.113807
0.354124
0.287592
0.38
1
268375
163181.0
-5.238808
0.623013
-5.784507
1.678889
-0.364432
-0.477295
-4.276132
-0.695173
-2.971644
...
-0.326140
1.509239
-0.215966
-0.245727
0.893041
0.865758
0.854657
-0.964482
39.98
1
272521
165132.0
-7.503926
-0.360628
-3.830952
2.486103
2.497367
1.332437
-6.783964
-15.415385
0.465512
...
-6.389132
2.249964
1.670508
0.140450
0.162147
1.207731
1.268958
0.097538
12.31
1
274382
165981.0
-5.766879
-8.402154
0.056543
6.950983
9.880564
-5.773192
-5.748879
0.721743
-1.076274
...
0.880395
-0.130436
2.241471
0.665346
-1.890041
-0.120803
0.073269
0.583799
0.00
1
274475
166028.0
-0.956390
2.361594
-3.171195
1.970759
0.474761
-1.902598
-0.055178
0.277831
-1.745854
...
0.473211
0.719400
0.122458
-0.255650
-0.619259
-0.484280
0.683535
0.443299
39.90
1
275992
166831.0
-2.027135
-1.131890
-1.135194
1.086963
-0.010547
0.423797
3.790880
-1.155595
-0.063434
...
-0.315105
0.575520
0.490842
0.756502
-0.142685
-0.602777
0.508712
-0.091646
634.30
1
276071
166883.0
2.091900
-0.757459
-1.192258
-0.755458
-0.620324
-0.322077
-1.082511
0.117200
-0.140927
...
0.288253
0.831939
0.142007
0.592615
-0.196143
-0.136676
0.020182
-0.015470
19.95
1
276864
167338.0
-1.374424
2.793185
-4.346572
2.400731
-1.688433
0.111136
-0.922038
-2.149930
-2.027474
...
-0.870779
0.504849
0.137994
0.368275
0.103137
-0.414209
0.454982
0.096711
349.08
1
279863
169142.0
-1.927883
1.125653
-4.518331
1.749293
-1.566487
-2.010494
-0.882850
0.697211
-2.064945
...
0.778584
-0.319189
0.639419
-0.294885
0.537503
0.788395
0.292680
0.147968
390.00
1
280143
169347.0
1.378559
1.289381
-5.004247
1.411850
0.442581
-1.326536
-1.413170
0.248525
-1.127396
...
0.370612
0.028234
-0.145640
-0.081049
0.521875
0.739467
0.389152
0.186637
0.76
1
280149
169351.0
-0.676143
1.126366
-2.213700
0.468308
-1.120541
-0.003346
-2.234739
1.210158
-0.652250
...
0.751826
0.834108
0.190944
0.032070
-0.739695
0.471111
0.385107
0.194361
77.89
1
281144
169966.0
-3.113832
0.585864
-5.399730
1.817092
-0.840618
-2.943548
-2.208002
1.058733
-1.632333
...
0.583276
-0.269209
-0.456108
-0.183659
-0.328168
0.606116
0.884876
-0.253700
245.00
1
281674
170348.0
1.991976
0.158476
-2.583441
0.408670
1.151147
-0.096695
0.223050
-0.068384
0.577829
...
-0.164350
-0.295135
-0.072173
-0.450261
0.313267
-0.289617
0.002988
-0.015309
42.53
1
492 rows × 31 columns
In [6]:
from sklearn.utils import shuffle
df_full.drop('Time', axis=1, inplace = True)
shuffle_df = shuffle(df_full, random_state=42)
num_total_cases = len(df_full)
df_train = shuffle_df[0:227845]
df_test = shuffle_df[227845:]
In [7]:
train_feature = np.array(df_train.values[:,0:29])
train_label = np.array(df_train.values[:,-1])
test_feature = np.array(df_test.values[:,0:29])
test_label = np.array(df_test.values[:,-1])
In [8]:
train_feature
Out[8]:
array([[ -1.65265066e+01, 8.58497180e+00, -1.86498532e+01, ...,
-2.01857525e+00, -1.04280417e+00, 3.64190000e+02],
[ 3.39812064e-01, -2.74374524e+00, -1.34069511e-01, ...,
4.09958027e-02, 1.02037825e-01, 5.20120000e+02],
[ 1.39959027e+00, -5.90701288e-01, 1.68618940e-01, ...,
1.14086454e-02, 4.63414166e-03, 3.10000000e+01],
...,
[ -1.63679912e+00, -2.88001171e-01, 3.04408460e+00, ...,
-2.19883189e-02, 1.43121533e-01, 7.81300000e+01],
[ 1.23933977e+00, -1.52563018e-01, -1.03219327e-01, ...,
-9.22603427e-02, -2.78862601e-02, 2.00000000e+00],
[ 3.39286374e-02, 6.62453224e-01, -1.99422851e+00, ...,
-6.26921714e-01, -2.67546624e-01, 7.60000000e-01]])
In [9]:
train_feature.shape
Out[9]:
(227845, 29)
In [10]:
train_label
Out[10]:
array([ 1., 0., 0., ..., 0., 0., 0.])
In [11]:
train_label.shape
Out[11]:
(227845,)
In [12]:
test_feature.shape
Out[12]:
(56962, 29)
In [13]:
test_label.shape
Out[13]:
(56962,)
In [14]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(train_feature)
train_feature_trans = scaler.transform(train_feature)
scaler.fit(test_feature)
test_feature_trans = scaler.transform(test_feature)
In [15]:
train_feature_trans
Out[15]:
array([[ 6.15096846e-01, 8.42768889e-01, 3.49045387e-01, ...,
5.91827193e-01, 3.10742520e-01, 1.92591222e-02],
[ 9.57164719e-01, 7.04498838e-01, 7.79009635e-01, ...,
6.51149915e-01, 3.44089730e-01, 2.75050238e-02],
[ 9.78658211e-01, 7.30777321e-01, 7.86038515e-01, ...,
6.50297703e-01, 3.41252534e-01, 1.63934426e-03],
...,
[ 9.17076826e-01, 7.34471857e-01, 8.52811137e-01, ...,
6.49335756e-01, 3.45286425e-01, 4.13167636e-03],
[ 9.75408151e-01, 7.36124917e-01, 7.79726023e-01, ...,
6.47311680e-01, 3.40305273e-01, 1.05764146e-04],
[ 9.50961060e-01, 7.46072410e-01, 7.35813956e-01, ...,
6.31911596e-01, 3.33324393e-01, 4.01903755e-05]])
In [28]:
######################### 建立模型
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
import matplotlib.pyplot as plt
def show_train_history(train_history,train,validation):
plt.plot(train_history.history[train])
plt.plot(train_history.history[validation])
plt.title('Train History')
plt.ylabel(train)
plt.xlabel('Epoch')
plt.legend(['train', 'validation'], loc='best')
plt.show()
model = Sequential() #一層一層到底,按順序
#輸入層(隱藏層1)
model.add(Dense(units=200,
input_dim=29,
kernel_initializer='uniform',
activation='relu'))
model.add(Dropout(0.5))
#隱藏層2,不用寫input_dim,因為就是前一層的units
model.add(Dense(units=200,
kernel_initializer='uniform',
activation='relu'))
model.add(Dropout(0.5))
#輸出層
model.add(Dense(units=1, #輸出一個數字
kernel_initializer='uniform',
activation='sigmoid'))
print(model.summary()) #可以清楚看到model還有參數數量
model.compile(loss='binary_crossentropy', #二元用binary
optimizer='adam', metrics=['accuracy'])
train_history = model.fit(x=train_feature_trans, y=train_label, #上面多分割一步在keras是內建的
validation_split=0.5, epochs=20,
batch_size=50000, verbose=2) #verbose=2表示顯示訓練過程
######################### 訓練過程視覺化
show_train_history(train_history,'acc','val_acc')
show_train_history(train_history,'loss','val_loss')
######################### 實際測驗得分
scores = model.evaluate(test_feature_trans, test_label)
print('\n')
print('accuracy=',scores[1])
######################### 紀錄模型預測情形(答案卷)
prediction = model.predict_classes(test_feature_trans)
#儲存訓練結果
#model.save_weights("Keras_CreditCardFraud_MLP.h5")
#print('model saved to disk')
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_4 (Dense) (None, 200) 6000
_________________________________________________________________
dropout_3 (Dropout) (None, 200) 0
_________________________________________________________________
dense_5 (Dense) (None, 200) 40200
_________________________________________________________________
dropout_4 (Dropout) (None, 200) 0
_________________________________________________________________
dense_6 (Dense) (None, 1) 201
=================================================================
Total params: 46,401
Trainable params: 46,401
Non-trainable params: 0
_________________________________________________________________
None
Train on 113922 samples, validate on 113923 samples
Epoch 1/20
0s - loss: 0.6872 - acc: 0.6792 - val_loss: 0.6523 - val_acc: 0.9983
Epoch 2/20
0s - loss: 0.6385 - acc: 0.9983 - val_loss: 0.5823 - val_acc: 0.9983
Epoch 3/20
0s - loss: 0.5604 - acc: 0.9983 - val_loss: 0.4748 - val_acc: 0.9983
Epoch 4/20
0s - loss: 0.4454 - acc: 0.9983 - val_loss: 0.3356 - val_acc: 0.9983
Epoch 5/20
0s - loss: 0.3044 - acc: 0.9983 - val_loss: 0.1934 - val_acc: 0.9983
Epoch 6/20
0s - loss: 0.1703 - acc: 0.9983 - val_loss: 0.0882 - val_acc: 0.9983
Epoch 7/20
0s - loss: 0.0779 - acc: 0.9983 - val_loss: 0.0355 - val_acc: 0.9983
Epoch 8/20
0s - loss: 0.0333 - acc: 0.9983 - val_loss: 0.0171 - val_acc: 0.9983
Epoch 9/20
0s - loss: 0.0174 - acc: 0.9983 - val_loss: 0.0125 - val_acc: 0.9983
Epoch 10/20
0s - loss: 0.0130 - acc: 0.9983 - val_loss: 0.0121 - val_acc: 0.9983
Epoch 11/20
0s - loss: 0.0124 - acc: 0.9983 - val_loss: 0.0127 - val_acc: 0.9983
Epoch 12/20
0s - loss: 0.0127 - acc: 0.9983 - val_loss: 0.0134 - val_acc: 0.9983
Epoch 13/20
0s - loss: 0.0131 - acc: 0.9983 - val_loss: 0.0140 - val_acc: 0.9983
Epoch 14/20
0s - loss: 0.0139 - acc: 0.9983 - val_loss: 0.0144 - val_acc: 0.9983
Epoch 15/20
0s - loss: 0.0141 - acc: 0.9983 - val_loss: 0.0146 - val_acc: 0.9983
Epoch 16/20
0s - loss: 0.0138 - acc: 0.9983 - val_loss: 0.0146 - val_acc: 0.9983
Epoch 17/20
0s - loss: 0.0145 - acc: 0.9983 - val_loss: 0.0145 - val_acc: 0.9983
Epoch 18/20
0s - loss: 0.0140 - acc: 0.9983 - val_loss: 0.0143 - val_acc: 0.9983
Epoch 19/20
0s - loss: 0.0136 - acc: 0.9983 - val_loss: 0.0140 - val_acc: 0.9983
Epoch 20/20
0s - loss: 0.0136 - acc: 0.9983 - val_loss: 0.0136 - val_acc: 0.9983
56544/56962 [============================>.] - ETA: 0s
accuracy= 0.998156665847
56544/56962 [============================>.] - ETA: 0s
In [29]:
df_ans = pd.DataFrame({'Real Class' :test_label})
df_ans['Prediction'] = prediction
In [30]:
df_ans[ df_ans['Real Class'] != df_ans['Prediction'] ]
Out[30]:
Real Class
Prediction
1420
1.0
0
1572
1.0
0
1589
1.0
0
2112
1.0
0
3770
1.0
0
3796
1.0
0
4574
1.0
0
4810
1.0
0
5647
1.0
0
5849
1.0
0
5905
1.0
0
7283
1.0
0
7890
1.0
0
8577
1.0
0
8728
1.0
0
9045
1.0
0
9375
1.0
0
10094
1.0
0
10340
1.0
0
10456
1.0
0
11671
1.0
0
12122
1.0
0
12329
1.0
0
12383
1.0
0
12680
1.0
0
13780
1.0
0
15093
1.0
0
15111
1.0
0
15425
1.0
0
15497
1.0
0
...
...
...
40127
1.0
0
40189
1.0
0
40443
1.0
0
41980
1.0
0
42148
1.0
0
43621
1.0
0
43845
1.0
0
44510
1.0
0
45199
1.0
0
45222
1.0
0
45242
1.0
0
45279
1.0
0
45447
1.0
0
46987
1.0
0
47010
1.0
0
47158
1.0
0
47412
1.0
0
47689
1.0
0
49324
1.0
0
49794
1.0
0
49926
1.0
0
50133
1.0
0
50761
1.0
0
51688
1.0
0
51822
1.0
0
52246
1.0
0
53703
1.0
0
55933
1.0
0
56465
1.0
0
56767
1.0
0
105 rows × 2 columns
In [31]:
df_ans['Prediction'].value_counts() #沒有預測到1的,超爛
Out[31]:
0 56962
Name: Prediction, dtype: int64
In [32]:
df_ans['Real Class'].value_counts()
Out[32]:
0.0 56857
1.0 105
Name: Real Class, dtype: int64
In [33]:
prediction_train = model.predict_classes(train_feature)
226496/227845 [============================>.] - ETA: 0s
In [34]:
df_train_ans = pd.DataFrame({'Real Class' :train_label})
df_train_ans['Prediction'] = prediction_train
In [35]:
df_train_ans[ df_train_ans['Real Class'] != df_train_ans['Prediction'] ]
Out[35]:
Real Class
Prediction
0
1.0
0
565
1.0
0
3025
1.0
0
3228
1.0
0
4178
1.0
0
4640
1.0
0
5100
1.0
0
5756
1.0
0
6054
1.0
0
6368
1.0
0
7011
1.0
0
7846
1.0
0
8090
1.0
0
8379
1.0
0
8456
1.0
0
9203
1.0
0
10279
1.0
0
10634
1.0
0
11066
1.0
0
11245
1.0
0
13509
1.0
0
16473
1.0
0
16564
1.0
0
16592
1.0
0
16839
1.0
0
17345
1.0
0
17388
1.0
0
18130
1.0
0
18544
1.0
0
18785
1.0
0
...
...
...
211413
1.0
0
212044
1.0
0
212428
1.0
0
212566
1.0
0
213192
1.0
0
213857
1.0
0
214939
1.0
0
215590
1.0
0
217172
1.0
0
217302
1.0
0
218002
1.0
0
218111
1.0
0
219398
1.0
0
219675
1.0
0
220316
1.0
0
220494
1.0
0
220676
1.0
0
220908
1.0
0
221153
1.0
0
221768
1.0
0
222246
1.0
0
223899
1.0
0
224143
1.0
0
224355
1.0
0
224761
1.0
0
224810
1.0
0
224957
1.0
0
226113
1.0
0
227489
1.0
0
227556
1.0
0
387 rows × 2 columns
In [36]:
df_train_ans['Prediction'].value_counts()
Out[36]:
0 227845
Name: Prediction, dtype: int64
In [37]:
df_train_ans['Real Class'].value_counts()
Out[37]:
0.0 227458
1.0 387
Name: Real Class, dtype: int64
In [38]:
import seaborn as sns
%matplotlib inline
cols = ['Real_Class_1','Real_Class_0'] #Gold standard
rows = ['Prediction_1','Prediction_0'] #diagnostic tool (our prediction)
B1P1 = len(df_ans[(df_ans['Prediction'] == df_ans['Real Class']) & (df_ans['Real Class'] == 1)])
B1P0 = len(df_ans[(df_ans['Prediction'] != df_ans['Real Class']) & (df_ans['Real Class'] == 1)])
B0P1 = len(df_ans[(df_ans['Prediction'] != df_ans['Real Class']) & (df_ans['Real Class'] == 0)])
B0P0 = len(df_ans[(df_ans['Prediction'] == df_ans['Real Class']) & (df_ans['Real Class'] == 0)])
conf = np.array([[B1P1,B0P1],[B1P0,B0P0]])
df_cm = pd.DataFrame(conf, columns = [i for i in cols], index = [i for i in rows])
f, ax= plt.subplots(figsize = (5, 5))
sns.heatmap(df_cm, annot=True, ax=ax)
ax.xaxis.set_ticks_position('top') #Making x label be on top is common in textbooks.
print('total test case number: ', np.sum(conf))
total test case number: 56962
In [39]:
def model_efficacy(conf):
total_num = np.sum(conf)
sen = conf[0][0]/(conf[0][0]+conf[1][0])
spe = conf[1][1]/(conf[1][0]+conf[1][1])
false_positive_rate = conf[0][1]/(conf[0][1]+conf[1][1])
false_negative_rate = conf[1][0]/(conf[0][0]+conf[1][0])
print('total_num: ',total_num)
print('G1P1: ',conf[0][0]) #G = gold standard; P = prediction
print('G0P1: ',conf[0][1])
print('G1P0: ',conf[1][0])
print('G0P0: ',conf[1][1])
print('##########################')
print('sensitivity: ',sen)
print('specificity: ',spe)
print('false_positive_rate: ',false_positive_rate)
print('false_negative_rate: ',false_negative_rate)
return total_num, sen, spe, false_positive_rate, false_negative_rate
model_efficacy(conf)
total_num: 56962
G1P1: 0
G0P1: 0
G1P0: 105
G0P0: 56857
##########################
sensitivity: 0.0
specificity: 0.998156665847
false_positive_rate: 0.0
false_negative_rate: 1.0
Out[39]:
(56962, 0.0, 0.99815666584740703, 0.0, 1.0)
其實滿失敗的,因為model通通都猜0
In [ ]:
Content source: Pytoddler/Kaggle-competition
Similar notebooks: